rm(list=ls()) # Clearing the global environment
# Setting the working directory
setwd('C:\\Users\\mawal\\OneDrive - Binghamton University\\Desktop\\Desktop_Folders\\Upwork\\Kreps_2')
df = read.csv('df.csv') # Importing the Study 1 Full Analysis File I converted to csv.

library(dplyr) # necessary for data manipulation
# Creating the Average Index
#################### ####################
df2 = df # Creating a copy of df

# Manipulating the Q1 variables so 1 = strongly disagree and 5 = strongly agree
df2$Q1 = ifelse(df2$Q1 == 1, 10, df2$Q1)
df2$Q1 = ifelse(df2$Q1 == 2, 9, df2$Q1)
df2$Q1 = ifelse(df2$Q1 == 3, 8, df2$Q1)
df2$Q1 = ifelse(df2$Q1 == 4, 7, df2$Q1)
df2$Q1 = ifelse(df2$Q1 == 5, 6, df2$Q1)
df2$Q1 = df2$Q1 - 5 # Keeps them on the 1-5 scale

# Doing the same for Q2 and Q3
df2$Q2 = ifelse(df2$Q2 == 11, 5, df2$Q2)
df2$Q2 = ifelse(df2$Q2 == 12, 4, df2$Q2)
df2$Q2 = ifelse(df2$Q2 == 13, 3, df2$Q2)
df2$Q2 = ifelse(df2$Q2 == 14, 2, df2$Q2)
df2$Q2 = ifelse(df2$Q2 == 15, 1, df2$Q2)

df2$Q3 = ifelse(df2$Q3 == 11, 5, df2$Q3)
df2$Q3 = ifelse(df2$Q3 == 12, 4, df2$Q3)
df2$Q3 = ifelse(df2$Q3 == 13, 3, df2$Q3)
df2$Q3 = ifelse(df2$Q3 == 14, 2, df2$Q3)
df2$Q3 = ifelse(df2$Q3 == 15, 1, df2$Q3)

# Creating the average index by averaging Q1-Q3.
df2$avg_index = (df2$Q1 +df2$Q2 +df2$Q3)/3

#######################################################################
library(MASS)

# Running the regressions with the correct number of gpt responses as the DV
regr = lm(gpt_correct ~ avg_index + Q12_1, data = df2)
summary(regr)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr), Sigma = vcov(regr))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(test) #coefficients
colMeans(coefs)

sqrt(diag(vcov(test))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)

test1 = list(1) # Temporary place holder
test1 = as.data.frame(test1)
names(test1)[names(test1) == "X1"] <- "lower" # 5th percentile placeholder
test1$median = 0 # 50th percentile placeholder
test1$upper = 0 # 95th percentile placeholder

for(i in 1:5) { # We have a 1-5 scale variable.
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$avg_index[row]*i + 
      coefs$Q12_1[row]*39 # The average age in the study is 39
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec1 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test1 = rbind(test1, vec1) # Binding the new row to the main test data frame
}

test1 = test1[-1,] # Remove the first row, just placeholder data
test1$avg_index = 1:5 # The scale is from 1-5

# Repeating the steps above this time for the correct number of human responses as the DV
regr_human = lm(human_correct ~ avg_index + Q12_1, data = df2)
summary(regr_human)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr_human), Sigma = vcov(regr_human))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr_human) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr_human))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)


test2 = list(1)
test2 = as.data.frame(test2)
names(test2)[names(test2) == "X1"] <- "lower"
test2$median = 0
test2$upper = 0

for(i in 1:5) { 
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$avg_index[row]*i + 
      coefs$Q12_1[row]*39
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec2 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test2 = rbind(test2, vec2) # Binding the new row to the main test data frame
}

test2 = test2[-1,]
test2$avg_index = 1:5

# Generating an indicator for whether the data is GPT3 or Human
test1$type = c('GPT3','GPT3','GPT3','GPT3','GPT3')
test2$type = c('Human','Human','Human','Human','Human')

# Binding the GPT 3 and Human data together
final = rbind(test1, test2)

library(ggplot2)

# Genearting the plot
p<- ggplot(final, aes(x=avg_index, y=median, col = type )) + 
  geom_point(position=position_dodge(0.25))+
  geom_errorbar(aes(ymin=lower, ymax=upper), width=.2,
                position=position_dodge(0.25))
index_regr_plot = p+labs(title="Social Media Use and Correct Response Rates", x="Social Media Use Index", y = "Mean Proportion Correct")+
  theme_classic() +
  scale_color_manual(values=c('black','gray')) +
  labs(col = "GPT3 vs Human")
index_regr_plot

ggsave('index_regr_plot.pdf', index_regr_plot)


#######################################################################
# Willingness to retweet/Like/Share
regr = lm(gpt_share_avg_index ~ avg_index + Q12_1, data = df2)
summary(regr)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr), Sigma = vcov(regr))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)

test1 = list(1)
test1 = as.data.frame(test1)
names(test1)[names(test1) == "X1"] <- "lower"
test1$median = 0
test1$upper = 0

for(i in 1:5) { 
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$avg_index[row]*i + 
      coefs$Q12_1[row]*39
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec1 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test1 = rbind(test1, vec1) # Binding the new row to the main test data frame
}

test1 = test1[-1,]
test1$avg_index = 1:5

# Running the regression this time with human and not GPT3
regr_human = lm(hum_share_avg_index ~ avg_index + Q12_1, data = df2)
summary(regr_human)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr_human), Sigma = vcov(regr_human))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr_human) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr_human))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)

mean(df2$Q12_1, na.rm = TRUE)

test2 = list(1)
test2 = as.data.frame(test2)
names(test2)[names(test2) == "X1"] <- "lower"
test2$median = 0
test2$upper = 0

for(i in 1:5) { 
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$avg_index[row]*i + 
      coefs$Q12_1[row]*39
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec2 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test2 = rbind(test2, vec2) # Binding the new row to the main test data frame
}

test2 = test2[-1,]
test2$avg_index = 1:5

test1$type = c('GPT3','GPT3','GPT3','GPT3','GPT3')
test2$type = c('Human','Human','Human','Human','Human')

final = rbind(test1, test2)


p<- ggplot(final, aes(x=avg_index, y=median, col = type )) + 
  geom_point(position=position_dodge(0.25))+
  geom_errorbar(aes(ymin=lower, ymax=upper), width=.2,
                position=position_dodge(0.25))
willingness_regr_plot = p+labs(title="Willingness to Share/Like/Retweet", x="Social Media Use Index", y = "Willingness Scale")+
  theme_classic() +
  scale_color_manual(values=c('black','gray')) +
  labs(col = "GPT3 vs Human")
willingness_regr_plot

ggsave('willingness_regr_plot.pdf', willingness_regr_plot)


###################################################
# Regressions with Age as the IV
library(MASS)
regr = lm(gpt_correct ~ Q12_1, data = df)
summary(regr)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr), Sigma = vcov(regr))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)


test1 = list(1)
test1 = as.data.frame(test1)
names(test1)[names(test1) == "X1"] <- "lower"
test1$median = 0
test1$upper = 0

for(i in 18:92) { # Age ranges from 18-92
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row] + coefs$Q12_1[row]*i
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec1 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test1 = rbind(test1, vec1) # Binding the new row to the main test data frame
}

test1 = test1[-1,]
test1$age = 18:92

regr_human = lm(human_correct ~ Q12_1, data = df)
summary(regr_human)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr_human), Sigma = vcov(regr_human))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr_human) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr_human))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)


test2 = list(1)
test2 = as.data.frame(test2)
names(test2)[names(test2) == "X1"] <- "lower"
test2$median = 0
test2$upper = 0

for(i in 18:92) { 
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$Q12_1[row]*i
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec2 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test2 = rbind(test2, vec2) # Binding the new row to the main test data frame
}

test2 = test2[-1,]
test2$age = 18:92

test1$type = 'GPT3'
test2$type = 'Human'

final = rbind(test1, test2)


p<- ggplot(final, aes(x=age, y=median, col = type )) + 
  geom_point()+
  geom_errorbar(aes(ymin=lower, ymax=upper), width=.2)
age_regr_plot = p+labs(title="Respondent Age and Correct Response Rate", x="Respondent Age", y = "Mean Proportion Correct")+
  theme_classic() +
  scale_color_manual(values=c('black','gray')) +
  labs(col = "GPT3 vs Human")
age_regr_plot

ggsave('age_regr_plot.pdf', age_regr_plot)



#######################################################
# Willingness to share as the DV and Age as the IV
regr = lm(gpt_share_avg_index ~ Q12_1, data = df)
summary(regr)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr), Sigma = vcov(regr))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr) #coefficients
colMeans(regr)

sqrt(diag(vcov(regr))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)

mean(df2$Q12_1, na.rm = TRUE)

test1 = list(1)
test1 = as.data.frame(test1)
names(test1)[names(test1) == "X1"] <- "lower"
test1$median = 0
test1$upper = 0

for(i in 18:92) {
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$Q12_1[row]*i
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec1 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test1 = rbind(test1, vec1) # Binding the new row to the main test data frame
}

test1 = test1[-1,]
test1$age = 18:92


regr_human = lm(hum_share_avg_index ~ Q12_1, data = df)
summary(regr_human)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr_human), Sigma = vcov(regr_human))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr_human) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr_human))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)

mean(df2$Q12_1, na.rm = TRUE)

test2 = list(1)
test2 = as.data.frame(test2)
names(test2)[names(test2) == "X1"] <- "lower"
test2$median = 0
test2$upper = 0

for(i in 18:92) { 
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$Q12_1[row]*i
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec2 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test2 = rbind(test2, vec2) # Binding the new row to the main test data frame
}

test2 = test2[-1,]
test2$age = 18:92

test1$type = 'GPT3'
test2$type = 'Human'

final = rbind(test1, test2)


p<- ggplot(final, aes(x=age, y=median, col = type )) + 
  geom_point()+
  geom_errorbar(aes(ymin=lower, ymax=upper), width=.2)
willingnessage_age_regr_plot = p+labs(title="Willingness to Share/Like/Retweet", x="Respondent Age", y = "Willingness Scale")+
  theme_classic() +
  scale_color_manual(values=c('black','gray')) +
  labs(col = "GPT3 vs Human")
willingnessage_age_regr_plot

ggsave('willingness_age_regr_plot.pdf', willingnessage_age_regr_plot)


##################################################################
# % real plots as DV


df2$real = (df2$gpt_incorrect + df2$human_correct)/10
df2$human_real = (df2$human_correct)/5
df2$gpt_real = (df2$gpt_incorrect)/5
library(MASS)

# Running the regressions with the correct number of gpt responses as the DV
regr = lm(gpt_real ~ avg_index + Q12_1, data = df2)
summary(regr)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr), Sigma = vcov(regr))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)

test1 = list(1) # Temporary place holder
test1 = as.data.frame(test1)
names(test1)[names(test1) == "X1"] <- "lower" # 5th percentile placeholder
test1$median = 0 # 50th percentile placeholder
test1$upper = 0 # 95th percentile placeholder

for(i in 1:5) { # We have a 1-5 scale variable.
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$avg_index[row]*i + 
      coefs$Q12_1[row]*39 # The average age in the study is 39
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec1 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test1 = rbind(test1, vec1) # Binding the new row to the main test data frame
}

test1 = test1[-1,] # Remove the first row, just placeholder data
test1$avg_index = 1:5 # The scale is from 1-5

# Repeating the steps above this time for the correct number of human responses as the DV
regr_human = lm(human_real ~ avg_index + Q12_1, data = df2)
summary(regr_human)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr_human), Sigma = vcov(regr_human))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr_human) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr_human))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)


test2 = list(1)
test2 = as.data.frame(test2)
names(test2)[names(test2) == "X1"] <- "lower"
test2$median = 0
test2$upper = 0

for(i in 1:5) { 
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$avg_index[row]*i + 
      coefs$Q12_1[row]*39
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec2 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test2 = rbind(test2, vec2) # Binding the new row to the main test data frame
}

test2 = test2[-1,]
test2$avg_index = 1:5

# Generating an indicator for whether the data is GPT3 or Human
test1$type = c('GPT3','GPT3','GPT3','GPT3','GPT3')
test2$type = c('Human','Human','Human','Human','Human')

# Binding the GPT 3 and Human data together
final = rbind(test1, test2)

library(ggplot2)

# Genearting the plot
p<- ggplot(final, aes(x=avg_index, y=median, col = type )) + 
  geom_point(position=position_dodge(0.25))+
  geom_errorbar(aes(ymin=lower, ymax=upper), width=.2,
                position=position_dodge(0.25))
index_regr_plot_real = p+labs(title="Social Media Use and Proportion Real Tweets", x="Social Media Use Index", y = "Proportion Real Tweets")+
  theme_classic() +
  scale_color_manual(values=c('black','gray')) +
  labs(col = "GPT3 vs Human")
index_regr_plot_real

ggsave('index_regr_plot_real.pdf', index_regr_plot_real)



###############################################
# Using real as the DV combining gpt incorrect and human correct
regr = lm(gpt_real ~ avg_index + Q12_1, data = df2)
summary(regr)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr), Sigma = vcov(regr))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)

test1 = list(1) # Temporary place holder
test1 = as.data.frame(test1)
names(test1)[names(test1) == "X1"] <- "lower" # 5th percentile placeholder
test1$median = 0 # 50th percentile placeholder
test1$upper = 0 # 95th percentile placeholder

for(i in 1:5) { # We have a 1-5 scale variable.
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$avg_index[row]*i + 
      coefs$Q12_1[row]*39 # The average age in the study is 39
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec1 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test1 = rbind(test1, vec1) # Binding the new row to the main test data frame
}

test1 = test1[-1,] # Remove the first row, just placeholder data
test1$avg_index = 1:5 # The scale is from 1-5


final = test1

library(ggplot2)

# Genearting the plot
p<- ggplot(final, aes(x=avg_index, y=median)) + 
  geom_point()+
  geom_errorbar(aes(ymin=lower, ymax=upper), width=.2)
index_regr_plot_real2 = p+labs(title="Social Media Use and Proportion Real Tweets", x="Social Media Use Index", y = "Proportion Real Tweets")+
  theme_classic()
index_regr_plot_real2

ggsave('index_regr_plot_real2.pdf', index_regr_plot_real2)

###############################################
# Regressions with Age as the IV and real as the DV
library(MASS)
regr = lm(gpt_real ~ Q12_1, data = df)
summary(regr)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr), Sigma = vcov(regr))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)


test1 = list(1)
test1 = as.data.frame(test1)
names(test1)[names(test1) == "X1"] <- "lower"
test1$median = 0
test1$upper = 0

for(i in 18:92) { # Age ranges from 18-92
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row] + coefs$Q12_1[row]*i
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec1 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test1 = rbind(test1, vec1) # Binding the new row to the main test data frame
}

test1 = test1[-1,]
test1$age = 18:92

regr_human = lm(human_real~ Q12_1, data = df)
summary(regr_human)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr_human), Sigma = vcov(regr_human))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr_human) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr_human))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)


test2 = list(1)
test2 = as.data.frame(test2)
names(test2)[names(test2) == "X1"] <- "lower"
test2$median = 0
test2$upper = 0

for(i in 18:92) { 
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row]  + coefs$Q12_1[row]*i
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec2 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test2 = rbind(test2, vec2) # Binding the new row to the main test data frame
}

test2 = test2[-1,]
test2$age = 18:92

test1$type = 'GPT3'
test2$type = 'Human'

final = rbind(test1, test2)


p<- ggplot(final, aes(x=age, y=median, col = type )) + 
  geom_point()+
  geom_errorbar(aes(ymin=lower, ymax=upper), width=.2)
age_regr_plot_real = p+labs(title="Respondent Age and Proportion Real Tweets", x="Respondent Age", y = "Proportion Real Tweets")+
  theme_classic() +
  scale_color_manual(values=c('black','gray')) +
  labs(col = "GPT3 vs Human")
age_regr_plot_real

ggsave('age_regr_plot_real.pdf', age_regr_plot_real)


###############################################
# % real as the DV and age as the IV

library(MASS)
regr = lm(real ~ Q12_1, data = df)
summary(regr)

#Using the variance-covariance matrix and coefficients to create 10000 multivariate normal distributions.
coefs = mvrnorm(n = 10000, mu = coefficients(regr), Sigma = vcov(regr))

# Doing some diagnositcs by comparing the newly created data with the regression output
coefficients(regr) #coefficients
colMeans(coefs)

sqrt(diag(vcov(regr))) #errors
apply(coefs, 2, sd)

#Turning coefs into data frame
coefs = as.data.frame(coefs)


test1 = list(1)
test1 = as.data.frame(test1)
names(test1)[names(test1) == "X1"] <- "lower"
test1$median = 0
test1$upper = 0

for(i in 18:92) { # Age ranges from 18-92
  for(row in 1:10000) { # We want to take the median and qi's from 10,0000 sammples
    coefs$xb[row] = coefs$`(Intercept)`[row] + coefs$Q12_1[row]*i
  }
  qi = coefs %>% #T  Taking quantities of interest from the xb variable
    summarize(quants = quantile(xb, probs = c(0.05, .5, 0.95)))
  vec1 = c(qi[1,1], qi[2,1], qi[3,1]) #Turning these quantities into a list
  test1 = rbind(test1, vec1) # Binding the new row to the main test data frame
}

test1 = test1[-1,]
test1$age = 18:92


final = test1


p<- ggplot(final, aes(x=age, y=median)) + 
  geom_point()+
  geom_errorbar(aes(ymin=lower, ymax=upper), width=.2)
age_regr_plot_real2 = p+labs(title="Respondent Age and Proportion Real Tweets", x="Respondent Age", y = "Proportion Real Tweets")+
  theme_classic()
age_regr_plot_real2

ggsave('age_regr_plot_real2.pdf', age_regr_plot_real2)
